# --------------------------------------------------------------------------------------------------------------#  
# Replication code for: "Enforcing Compulsory Schooling by Linking Welfare Payments to School Attendance: 
#                        Lessons from Australia's Northern Territory"
#
# Author: Kyle Peyton (kyle.peyton@yale.edu)
#
# This file covers exploratory analyses: 
#   1) Graphing trends in indigenous NAPLAN participation across time by State and domain 
#   2) Graphing trends in indigenous students above minimum standard across time by State and domain 
#   3) Tabulating average differences between Indigenous populations by State using NATSISS 2008
#
# created 14 February 2014  
# last update 25 July 2014
# --------------------------------------------------------------------------------------------------------------# 

require(foreign)
require(Synth)
require(kernlab)
require(reshape)
require(ggplot2)
require(scales)
require(ggthemes)
require(gridExtra)
require(reshape2)
require(xtable)

####--------Table 1: differences between Indigenous populations by State------#####

natsiss <- read.dta("natsiss08.dta")

natsissvars <- c("speakindig","idclan","homeland","ceremony","community","crisissupport","removed","removed2","psych1","psych2","alcohol4","smoker","nodisable","year9","emp",
                 "unemp","nilf","fin1","fin2")

tabout <- t(natsiss[,names(natsiss)%in% natsissvars])
colnames(tabout) <- unique(natsiss$state)
xtable(tabout)

####-----------------------Table 4: NAPLAN outcomes---------------------------####
naplan <- read.dta("naplanclean.dta")

### Table 4: subset that creates "extract" determines table column
extract <- naplan[naplan$jurisdiction %in% c("AUS") & naplan$indig == 0,names(naplan)%in% c("jurisdiction","year","grade","part_pct",
                                                                                                    "domain","abovemin","passrate")]
extract <- droplevels(extract)
domain <- c("read","math")

participate <- list(read8=NA,read9=NA,read10=NA,read11=NA,read12=NA,math8=NA,math9=NA,math10=NA,math11=NA,math12=NA)
j = 1  
for(i in 1:length(domain)) {  
  for(k in 2008:2012) {    
    participate[j] <- data.frame(tapply(extract[extract$domain==domain[i] & extract$year==k,]$part_pct,
                                        extract[extract$domain==domain[i] & extract$year==k,]$jurisdiction,mean))
    j =  j + 1     
  }
}

abovemin <- list(read8=NA,read9=NA,read10=NA,read11=NA,read12=NA,math8=NA,math9=NA,math10=NA,math11=NA,math12=NA)
j = 1  
for(i in 1:length(domain)) {  
  for(k in 2008:2012) {    
    abovemin[j] <- data.frame(tapply(extract[extract$domain==domain[i] & extract$year==k,]$abovemin,
                                     extract[extract$domain==domain[i] & extract$year==k,]$jurisdiction,mean))
    j =  j + 1     
  }
}

passrate <- list(read8=NA,read9=NA,read10=NA,read11=NA,read12=NA,math8=NA,math9=NA,math10=NA,math11=NA,math12=NA)
j = 1  
for(i in 1:length(domain)) {  
  for(k in 2008:2012) {    
    passrate[j] <- data.frame(tapply(extract[extract$domain==domain[i] & extract$year==k,]$passrate,
                                     extract[extract$domain==domain[i] & extract$year==k,]$jurisdiction,mean))
    j =  j + 1     
  }
}

## combine information for output via xtable 
output<- list()
year <- c(2008:2012)
for(i in 1:5) {
  temp <- data.frame(data.frame(participate[i]),data.frame(abovemin[i]),data.frame(passrate[i]))
  colnames(temp) <- c("Participation","Above minimum","Pass rate")
  temp$year <- year[i]
  temp$domain <- "read"
  output[[i]] <- temp
}

for(i in 6:10) {
  temp <- data.frame(data.frame(participate[i]),data.frame(abovemin[i]),data.frame(passrate[i]))
  colnames(temp) <- c("Participation","Above minimum","Pass rate")
  temp$year <- year[i-5]
  temp$domain <- "math"
  output[[i]] <- temp
}

table <- data.frame(matrix(NA,10,5))
for(i in 1:10){
table[i,] <- output[[i]]
}
colnames(table) <- colnames(output[[1]])
xtable(table)

####--------Trend plots for indigenous students participating in NAPLAN---------####
indig <- naplan[naplan$indig==1,]

mathplots <- list(math3=NA,math5=NA,math7=NA,math9=NA)
k <- 1
for(i in c(3,5,7)) {
mathplots[[k]] <- ggplot(data=indig[indig$grade == i & indig$domain=="math",], aes(x=year, y=part_pct, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
    geom_line(size=1)  +
    ylab("Indigenous students participating in NAPLAN") + xlab(NULL) + 
   ggtitle(paste("Year",i)) +
    theme_bw() +  
    scale_y_continuous(breaks=seq(.6,1,.10), limits = c(.6,1), labels = percent_format()) +
    coord_cartesian(xlim = c(2008,2012)) +
    theme(axis.text.x=element_text(angle=0),
          panel.grid.minor.x=element_blank(), 
          panel.grid.minor.y=element_blank(),
          panel.grid.major.x=element_blank(),
          panel.grid.major.y=element_blank(),
          legend.position = c(0.85,0.15),
          legend.justification = c(1,0),
          legend.text = element_text(color= 'black', size = 12),
          legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
          legend.key.size = unit(1.5, 'lines'),
          legend.background = element_rect(fill="transparent"),
          legend.title=element_blank(),
          text = element_text(family = "serif", size = 16))   +
    scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
    geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 
k <- k+1  
  
}


mathplots[[k]] <- ggplot(data=indig[indig$grade == 9 & indig$domain=="math",], aes(x=year, y=part_pct, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
    geom_line(size=1)  +
    ylab("Indigenous students participating in NAPLAN") + xlab(NULL) + 
    ggtitle(paste("Year",9)) +
    theme_bw() +  
    scale_y_continuous(breaks=seq(.6,1,.10), limits = c(.6,1), labels = percent_format()) +
    coord_cartesian(xlim = c(2008,2012)) +
    theme(axis.text.x=element_text(angle=0),
          panel.grid.minor.x=element_blank(), 
          panel.grid.minor.y=element_blank(),
          panel.grid.major.x=element_blank(),
          panel.grid.major.y=element_blank(),
          legend.position = c(0.85,0.75),
          legend.justification = c(1,0),
          legend.text = element_text(color= 'black', size = 12),
          legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
          legend.key.size = unit(1.5, 'lines'),
          legend.background = element_rect(fill="transparent"),
          legend.title=element_blank(),
          text = element_text(family = "serif", size = 16))   +
    scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
    geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 



math1 <-mathplots[[1]]
math2 <-mathplots[[2]]
math3 <-mathplots[[3]]
math4 <-mathplots[[4]]

source("multiplot.R")


pdf("trendmathparticipate.pdf",width=17,height=12) 
multiplot(math1,math3,math2,math4, cols = 2)
dev.off()

## generate participation plots for reading 
readplots <- list(read3=NA,read5=NA,read7=NA,read9=NA)
k <- 1
for(i in c(3,5,7)) {
  readplots[[k]] <- ggplot(data=indig[indig$grade == i & indig$domain=="read",], aes(x=year, y=part_pct, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
    geom_line(size=1)  +
    ylab("Indigenous students participating in NAPLAN") + xlab(NULL) + 
    ggtitle(paste("Year",i)) +
    theme_bw() +  
    scale_y_continuous(breaks=seq(.6,1,.10), limits = c(.6,1), labels = percent_format()) +
    coord_cartesian(xlim = c(2008,2012)) +
    theme(axis.text.x=element_text(angle=0),
          panel.grid.minor.x=element_blank(), 
          panel.grid.minor.y=element_blank(),
          panel.grid.major.x=element_blank(),
          panel.grid.major.y=element_blank(),
          legend.position = c(0.85,0.15),
          legend.justification = c(1,0),
          legend.text = element_text(color= 'black', size = 12),
          legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
          legend.key.size = unit(1.5, 'lines'),
          legend.background = element_rect(fill="transparent"),
          legend.title=element_blank(),
          text = element_text(family = "serif", size = 16))   +
    scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
    geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 
  k <- k+1  
  
}


readplots[[k]] <- ggplot(data=indig[indig$grade == 9 & indig$domain=="read",], aes(x=year, y=part_pct, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
  geom_line(size=1)  +
  ylab("Indigenous students participating in NAPLAN") + xlab(NULL) + 
  ggtitle(paste("Year",9)) +
  theme_bw() +  
  scale_y_continuous(breaks=seq(.6,1,.10), limits = c(.6,1), labels = percent_format()) +
  coord_cartesian(xlim = c(2008,2012)) +
  theme(axis.text.x=element_text(angle=0),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(),
        panel.grid.major.y=element_blank(),
        legend.position = c(0.85,0.75),
        legend.justification = c(1,0),
        legend.text = element_text(color= 'black', size = 12),
        legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
        legend.key.size = unit(1.5, 'lines'),
        legend.background = element_rect(fill="transparent"),
        legend.title=element_blank(),
        text = element_text(family = "serif", size = 16))   +
  scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
  geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 


read1 <-readplots[[1]]
read2 <-readplots[[2]]
read3 <-readplots[[3]]
read4 <-readplots[[4]]


pdf("trendreadparticipate.pdf",width=17,height=12) 
multiplot(read1,read3,read2,read4, cols = 2)
dev.off()

####--------Trend plots for indigenous students above minimum standards ---------####

## generate participation plots for math 
mathplots <- list(math3=NA,math5=NA,math7=NA,math9=NA)
k <- 1
for(i in c(3,5,7)) {
  mathplots[[k]] <- ggplot(data=indig[indig$grade == i & indig$domain=="math",], aes(x=year, y=abovemin, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
    geom_line(size=1)  +
    ylab("Indigenous students above minimum standard") + xlab(NULL) + 
    ggtitle(paste("Year",i)) +
    theme_bw() +  
    scale_y_continuous(breaks=seq(0,1,.10), limits = c(0,1), labels = percent_format()) +
    coord_cartesian(xlim = c(2008,2012)) +
    theme(axis.text.x=element_text(angle=0),
          panel.grid.minor.x=element_blank(), 
          panel.grid.minor.y=element_blank(),
          panel.grid.major.x=element_blank(),
          panel.grid.major.y=element_blank(),
          legend.position = c(0.75,0.10),
          legend.justification = c(1,0),
          legend.text = element_text(color= 'black', size = 12),
          legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
          legend.key.size = unit(1.5, 'lines'),
          legend.background = element_rect(fill="transparent"),
          legend.title=element_blank(),
          text = element_text(family = "serif", size = 16))   +
    scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
    geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 
  k <- k+1  
  
}


mathplots[[k]] <- ggplot(data=indig[indig$grade == 9 & indig$domain=="math",], aes(x=year, y=abovemin, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
  geom_line(size=1)  +
  ylab("Indigenous students above minimum standard") + xlab(NULL) + 
  ggtitle(paste("Year",9)) +
  theme_bw() +  
  scale_y_continuous(breaks=seq(0,1,.10), limits = c(0,1), labels = percent_format()) +
  coord_cartesian(xlim = c(2008,2012)) +
  theme(axis.text.x=element_text(angle=0),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(),
        panel.grid.major.y=element_blank(),
        legend.position = c(0.75,0.75),
        legend.justification = c(1,0),
        legend.text = element_text(color= 'black', size = 12),
        legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
        legend.key.size = unit(1.5, 'lines'),
        legend.background = element_rect(fill="transparent"),
        legend.title=element_blank(),
        text = element_text(family = "serif", size = 16))   +
  scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
  geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 



math1 <-mathplots[[1]]
math2 <-mathplots[[2]]
math3 <-mathplots[[3]]
math4 <-mathplots[[4]]

pdf("trendmathabovemin.pdf",width=17,height=12) 
multiplot(math1,math3,math2,math4, cols = 2)
dev.off()


## generate plots for reading 
readplots <- list(read3=NA,read5=NA,read7=NA,read9=NA)
k <- 1
for(i in c(3,5,7)) {
  readplots[[k]] <- ggplot(data=indig[indig$grade == i & indig$domain=="read",], aes(x=year, y=abovemin, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
    geom_line(size=1)  +
    ylab("Indigenous students above minimum standard") + xlab(NULL) + 
    ggtitle(paste("Year",i)) +
    theme_bw() +  
    scale_y_continuous(breaks=seq(0,1,.10), limits = c(0,1), labels = percent_format()) +
    coord_cartesian(xlim = c(2008,2012)) +
    theme(axis.text.x=element_text(angle=0),
          panel.grid.minor.x=element_blank(), 
          panel.grid.minor.y=element_blank(),
          panel.grid.major.x=element_blank(),
          panel.grid.major.y=element_blank(),
          legend.position = c(0.75,0.05),
          legend.justification = c(1,0),
          legend.text = element_text(color= 'black', size = 12),
          legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
          legend.key.size = unit(1.5, 'lines'),
          legend.background = element_rect(fill="transparent"),
          legend.title=element_blank(),
          text = element_text(family = "serif", size = 16))   +
    scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
    geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 
  k <- k+1  
  
}


readplots[[k]] <- ggplot(data=indig[indig$grade == 9 & indig$domain=="read",], aes(x=year, y=abovemin, group=as.factor(jurisdiction), colour = as.factor(nt))) + 
  geom_line(size=1)  +
  ylab("Indigenous students above minimum standard") + xlab(NULL) + 
  ggtitle(paste("Year",9)) +
  theme_bw() +  
  scale_y_continuous(breaks=seq(0,1,.10), limits = c(0,1), labels = percent_format()) +
  coord_cartesian(xlim = c(2008,2012)) +
  theme(axis.text.x=element_text(angle=0),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(),
        panel.grid.major.y=element_blank(),
        legend.position = c(0.85,0.75),
        legend.justification = c(1,0),
        legend.text = element_text(color= 'black', size = 12),
        legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
        legend.key.size = unit(1.5, 'lines'),
        legend.background = element_rect(fill="transparent"),
        legend.title=element_blank(),
        text = element_text(family = "serif", size = 16))   +
  scale_color_manual(breaks = c("0","1"), values=c("gray","black"), labels = c("Other Australian States/Territories","Northern Territory")) +
  geom_vline(xintercept = 2009, col = "black", lty=3, size = 1) 


read1 <-readplots[[1]]
read2 <-readplots[[2]]
read3 <-readplots[[3]]
read4 <-readplots[[4]]

pdf("trendreadabovemin.pdf",width=17,height=12) 
multiplot(read1,read3,read2,read4, cols = 2)
dev.off()

####-----------------Appendix: Tables to accompany trend plots------------------####

mathlist <- list()
readlist <- list()
j = 1
for(i in c(3,5,7,9)) {
  math1 <- dcast(indig[indig$domain=="math" & indig$grade==i,], jurisdiction~year, value.var="part_pct")
  math2 <- dcast(indig[indig$domain=="math" & indig$grade==i,], jurisdiction~year, value.var="abovemin")
  math3 <- merge(math1,math2,by="jurisdiction")
  math3$jurisdiction <- factor(math3$jurisdiction,levels = c("NT","WA","QLD","NSW","VIC","SA","TAS","ACT"),ordered = T)
  mathlist[[j]] <- math3[with(math3,order(jurisdiction)),]
  
  read1 <- dcast(indig[indig$domain=="read" & indig$grade==i,], jurisdiction~year, value.var="part_pct")
  read2 <- dcast(indig[indig$domain=="read" & indig$grade==i,], jurisdiction~year, value.var="abovemin")
  read3 <- merge(read1,read2,by="jurisdiction")
  read3$jurisdiction <- factor(read3$jurisdiction,levels = c("NT","WA","QLD","NSW","VIC","SA","TAS","ACT"),ordered = T)
  readlist[[j]] <- read3[with(read3,order(jurisdiction)),]
  
  j = j + 1
}

for(i in 1:4) {
  print(xtable(mathlist[[i]]))
  print(xtable(readlist[[i]]))  
}





